unix-only function to count lines of code across all files

count_lines_of_code() {
    local directory="$1"
    local exclude_directory="$2"
    local total_lines=0

    if [ -z "$directory" ]; then
        echo "Please provide a directory."
        return 1
    fi

    if [ -z "$exclude_directory" ]; then
        echo "Please provide a directory to exclude."
        return 1
    fi

    # Find all .R and .Rmd files in the specified directory excluding the exclude_directory and count their lines
    for file in $(find "$directory" -path "$exclude_directory" -prune -o -type f \( -name "*.R" -o -name "*.Rmd" \) -print); do
        local lines=$(wc -l < "$file")
        total_lines=$((total_lines + lines))
    done

    echo "Total lines of code in .R and .Rmd files in $directory excluding $exclude_directory: $total_lines"
}

# Call the function with a specific directory and exclude directory
count_lines_of_code "." "./packrat"
## Total lines of code in .R and .Rmd files in . excluding ./packrat: 1258
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'plotly'
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## The following object is masked from 'package:graphics':
## 
##     layout
source("data_import.R", echo = F, local = knitr::knit_global())
## 
## Attaching package: 'rio'
## The following object is masked from 'package:plotly':
## 
##     export
knitr::read_chunk('data_import.R')

Ilość rekordów we wszystkich ramkach danych

total_rows <- ls() %>%
  set_names() %>%
  map(~ get(.)) %>%
  keep(is.data.frame) %>%
  map_int(nrow) %>%
  sum()


total_rows
## [1] 141812
total_rows > 30000
## [1] TRUE

Czyszczenie danych

Usuwanie kolumny “Pos”

Aby uniknąć duplikatów w kolumnach, należy usunąć kolumnę “Pos”

# source("cleaning.R", echo = F, local = knitr::knit_global())
knitr::read_chunk('cleaning.R')

TODO - sklasyfikować jakie gatunki są najchętniej słuchane (relacja: artysta - gatunek) pomiędzy wszystkimi platformami, użyć MusicOSet

# Data frame containing metadata about artists
OSet_artists <- import("clean_data/musicoset_metadata/artists.csv")

# OSet_artists genre variables unification
OSet_artists <- OSet_artists %>%
  mutate(main_genre = case_when(
    grepl("rap", main_genre) ~ "rap",
    grepl("hip hop", main_genre) ~ "rap",
    grepl("drill", main_genre) ~ "rap",
    grepl("rock", main_genre) ~ "rock",
    grepl("country", main_genre) ~ "country",
    grepl("r&b", main_genre) ~ "r&b",
    grepl("edm", main_genre) ~ "edm",
    grepl("pop", main_genre) ~ "pop",
    grepl("soul", main_genre) ~ "soul",
    .default = as.character(main_genre)
  ))

OSet_artists_genre <- OSet_artists %>%
  select(c("name", "main_genre"))

Przetwarzanie, aby uzyskać ramkę danych z najchętniej słuchanymi gatunkami i ramkę danych z najchętniej słuchanymi artystami ogólnie

# eu_artisttotals - only in europe
# eua_artisttotals - based on albums - only in europe
# ww_artisttotals
# spotify_listeners
# youtube_archive
# apple_songs_artisttotals
# apple_songs_eu_artisttotals - only in europe


colnames(OSet_artists_genre) <- c("Artist", "Genre")

artists_genre_comparison <- ww_artisttotals%>%
  inner_join(
    spotify_listeners,
    by = "Artist",
    suffix = c(".ww", ".spotify")
  ) %>%
  inner_join(
    youtube_archive,
    by = "Artist",
    suffix = c(".ww", ".youtube")
  ) %>%
  inner_join(
    apple_songs_artisttotals,
    by = "Artist",
    suffix = c(".ww", ".apple")
  ) %>%
  inner_join(
    OSet_artists_genre,
    by = "Artist",
    suffix = c(".ww", ".genre")
  )

# Test to see if the are no duplicate rows
artists_genre_comparison %>%
  filter(Artist == "Adele")
##   Artist   Total.ww Today.ww 1M.ww 2M.ww 3M.ww 4M.ww 5M.ww  Listeners
## 1  Adele 82,855,252    8,797    10     8     6     5     5 50,515,140
##   Daily Trend Peak PkListeners Total.youtube 100M      Total Today.apple
## 1     104,639   31  58,244,524      13,707.3   18 11,442,493       1,377
##   1M.apple 2M.apple 3M.apple 4M.apple 5M.apple Genre
## 1        3        1        1        1        0  soul
# Column cleaning
artists_genre_comparison$Total.youtube <- str_replace_all(artists_genre_comparison$Total.youtube, ",","")
artists_genre_comparison$Total.ww <- str_replace_all(artists_genre_comparison$Total.ww, ",", "")
artists_genre_comparison$PkListeners <- str_replace_all(artists_genre_comparison$PkListeners, ",", "")
artists_genre_comparison$Total <- str_replace_all(artists_genre_comparison$Total, ",", "")
artists_genre_comparison$Listeners <- str_replace_all(artists_genre_comparison$Listeners , ",", "")


artists_genre_comparison <- artists_genre_comparison %>%
  transform(
    Total.youtube = as.numeric(Total.youtube),
    Total.ww = as.numeric(Total.ww),
    PkListeners = as.numeric(PkListeners),
    Total = as.numeric(Total),
    Listeners = as.numeric(Listeners)
  ) %>%
  mutate(
    # Total.youtube column is equal to total views in millions
    Total.youtube = as.numeric(Total.youtube) * 1000000
  ) %>%
  rename(Total.apple = Total, Total.spotify = PkListeners)


# Sum of all the listeners and views across all of the dsp column
artists_genre_comparison <- artists_genre_comparison %>%
  mutate(
    Sum_listeners_views_across = Total.youtube + Total.ww + Total.apple + Total.spotify
  ) %>%
  arrange(desc(Sum_listeners_views_across))
preferable_genres <- artists_genre_comparison %>%
  select(Artist, Genre, Sum_listeners_views_across) %>%
  group_by(Genre) %>%
  count() %>%
  arrange(desc(n))

head(preferable_genres)
## # A tibble: 6 × 2
## # Groups:   Genre [6]
##   Genre       n
##   <chr>   <int>
## 1 pop        88
## 2 rap        26
## 3 rock       11
## 4 latin       6
## 5 country     5
## 6 r&b         4

Wizualizacja - które gatunki są najbardziej słuchane ogólnie

ggplot(
  head(preferable_genres),
  aes(
    x = reorder(Genre, -n),
    y = n)
) + geom_bar(
  stat = "identity",
  width = 0.5,
  fill = "#581845"
  ) + labs(
    title = "Najczęciej słuchane gatunki"
  ) + xlab(
    ""
    ) + ylab(
      ""
    ) + theme_fivethirtyeight() + scale_fill_fivethirtyeight()

Wizualizacja - najchętniej słuchani artyści na wszystkich platformach - summarized

top_artists_total <- artists_genre_comparison %>%
  select(Artist, Sum_listeners_views_across) %>%
  top_n(100, wt = Sum_listeners_views_across)

# separate top artists for labeling
top_labels <- top_artists_total %>%
  top_n(20, wt = Sum_listeners_views_across)

bot_labels <- top_artists_total %>%
  top_n(-18, wt = Sum_listeners_views_across)

mid_labels <- top_artists_total %>%
  slice(21:(nrow(top_artists_total) - 18))
top_artists_total %>%
  ggplot(
    aes(x = Artist, y = Sum_listeners_views_across, size = log(Sum_listeners_views_across))) +
    geom_point(aes(alpha = Sum_listeners_views_across), color = "#7e7b77", fill = "#edc491", shape = 21) +
    labs(
      title = "Najczęściej streamowani artyści",
      subtitle = "Globalnie - sumaryzacja"
    ) +
    xlab("Artysta") +
    ylab("Ilość streamów") +
    scale_size(range = c(1, 20)) +
    scale_alpha_continuous(range = c(0.3, 1)) +
    scale_y_log10() +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + 
    theme(
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      legend.position = "none",
      panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
      panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    ) +
    geom_text(
      data = top_labels,
      aes(label = Artist),
      vjust = 0.5, 
      hjust = 0.5,
      size = 2.5,
      color = "#1b2322"
    )

# TODO zrepelować tylko te labely które trzeba

# TODO zrobić go dla polski

Wersja interactive

ggplotly(top_artists_total %>%
  ggplot(aes(x = Artist, y = Sum_listeners_views_across, size = log(Sum_listeners_views_across))) +
  geom_point(aes(alpha = Sum_listeners_views_across), color = "#7e7b77", fill = "#edc491", shape = 21) +
  xlab("Artysta") +
  ylab("Ilość streamów") +
  labs(
    title = "Najczęściej streamowani artyści",
    subtitle = "Globalnie - wersja interaktywna"
  ) +
  scale_size(range = c(3, 15), name = "Sum of Listeners/Views") +
  scale_alpha_continuous(range = c(0.3, 1)) +
  scale_y_log10() +
  theme_fivethirtyeight() + 
  scale_fill_fivethirtyeight() + 
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    legend.position = "none",
    panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
    panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
    plot.title = element_text(size = 13),
    plot.subtitle = element_text(size = 10)
  ) +
  geom_text(
    data = top_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2.5,
    color = "#1b2322"
  ) +
  geom_text(
    data = mid_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2,
    color = "#1b2322"
  ) +
  geom_text(
    data = bot_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 1,
    color = "#1b2322"
  ) 
)

Najchętniej słuchane gatunki na spotify - worldwide

preferable_genres_spotify <- suppressWarnings({
  spotify_artists %>%
    mutate(across(c('Streams', 'Daily', 'As lead', 'Solo', 'As feature'), ~ str_replace(., ",", ""))) %>%
    mutate(across(c('Streams', 'Daily', 'As lead', 'Solo', 'As feature'), ~ as.numeric(.) * 1000000)) %>%
    left_join(
      OSet_artists_genre,
      by = "Artist",
      suffix = c(".spotify", ".oset")
    ) %>%
    drop_na() %>% # dropping rows that were not matched in left_join
    select(
      Artist, Genre, Streams
    ) %>%
    group_by(Genre) %>%
    count() %>%
    arrange(desc(n))
})


head(preferable_genres_spotify)
## # A tibble: 6 × 2
## # Groups:   Genre [6]
##   Genre       n
##   <chr>   <int>
## 1 pop       366
## 2 rap       205
## 3 rock      101
## 4 country    68
## 5 latin      39
## 6 r&b        36
ggplot(
  head(preferable_genres_spotify),
  aes(
    x = reorder(Genre, -n),
    y = n)
) + geom_bar(
  stat = "identity",
  width = 0.5,
  fill = "#6cd980"
  ) + labs(
    title = "Najczęściej słuchane gatunki na Spotify",
    subtitle = "Globalnie"
  ) + xlab(
    ""
    ) + ylab(
      ""
    ) +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + theme(
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    )

Wizualizacja - najchętniej słuchani artyści na spotify - worldwide - summarized

# TODO pogrupowac ramki wedlug liczb z poszczegolnych serwisow

top_artists_spotify <- artists_genre_comparison %>%
  select(Artist, Total.spotify) %>%
  top_n(100, wt = Total.spotify)

# separate top artists for labeling
top_labels <- top_artists_spotify %>%
  top_n(20, wt = Total.spotify)

bot_labels <- top_artists_spotify %>%
  top_n(-18, wt = Total.spotify)

mid_labels <- top_artists_spotify %>%
  slice(21:(nrow(top_artists_spotify) - 18))
top_artists_spotify %>%
  ggplot(
    aes(x = Artist, y = Total.spotify, size = log(Total.spotify))) +
    geom_point(aes(alpha = Total.spotify), color = "#7e7b77", fill = "#6cd980", shape = 21) +
    labs(
      title = "Najczęściej streamowani artyści na Spotify",
      subtitle = "Globalnie - sumaryzacja"
    ) +
    xlab("Artysta") +
    ylab("Ilość streamów") +
    scale_size(range = c(1, 20)) +
    scale_alpha_continuous(range = c(0.3, 1)) +
    scale_y_log10() +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + 
    theme(
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      legend.position = "none",
      panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
      panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    ) +
    geom_text(
      data = top_labels,
      aes(label = Artist),
      vjust = 0.5, 
      hjust = 0.5,
      size = 2.5,
      color = "#1b2322"
    )

# TODO zrepelować tylko te labely które trzeba

# TODO zrobić go dla polski

Wersja interactive

ggplotly(top_artists_spotify %>%
  ggplot(aes(x = Artist, y = Total.spotify, size = log(Total.spotify))) +
  geom_point(aes(alpha = Total.spotify), color = "#7e7b77", fill = "#6cd980", shape = 21) +
  xlab("Artysta") +
  ylab("Ilość streamów") +
  labs(
    title = "Najczęściej streamowani artyści na Spotify",
    subtitle = "Globalnie - wersja interaktywna"
  ) +
  scale_size(range = c(3, 15), name = "Sum of Listeners/Views") +
  scale_alpha_continuous(range = c(0.3, 1)) +
  scale_y_log10() +
  theme_fivethirtyeight() + 
  scale_fill_fivethirtyeight() + 
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    legend.position = "none",
    panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
    panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
    plot.title = element_text(size = 13),
    plot.subtitle = element_text(size = 10)
  ) +
  geom_text(
    data = top_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2.5,
    color = "#1b2322"
  ) +
  geom_text(
    data = mid_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2,
    color = "#1b2322"
  ) +
  geom_text(
    data = bot_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 1,
    color = "#1b2322"
  ) 
)

Wizualizacja - najchętniej słuchani artyści na spotify - w polsce - summarized

Najczęściej słuchane gatunki - Apple Music - worldwide

preferable_genres_apple <- suppressWarnings({
  apple_songs_artisttotals %>%
    mutate(across(c('Total', 'Today'), ~ str_replace(., ",", ""))) %>%
    mutate(across(c('Total', 'Today'), ~ as.numeric(.) * 1000000)) %>%
    left_join(
      OSet_artists_genre,
      by = "Artist",
      suffix = c(".apple", ".oset")
    ) %>%
    drop_na() %>% # dropping rows that were not matched in left_join
    select(
      Artist, Genre, Total
    ) %>%
    group_by(Genre) %>%
    count() %>%
    arrange(desc(n))
})

preferable_genres_apple <- preferable_genres_apple[-6,] # removing empty row

head(preferable_genres_apple)
## # A tibble: 6 × 2
## # Groups:   Genre [6]
##   Genre                n
##   <chr>            <int>
## 1 pop                 29
## 2 rap                 14
## 3 rock                 9
## 4 adult standards      3
## 5 latin                3
## 6 australian dance     2
ggplot(
  head(preferable_genres_apple),
  aes(
    x = reorder(Genre, -n),
    y = n)
) + geom_bar(
  stat = "identity",
  width = 0.5,
  fill = "#fa2a44"
  ) + labs(
    title = "Najczęściej słuchane gatunki na Apple Music",
    subtitle = "Globalnie"
  ) + xlab(
    ""
    ) + ylab(
      ""
    ) +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + 
    theme(
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    )

Wizualizacja - najchętniej słuchani artyści na Apple Music - worldwide - summarized

top_artists_apple <- artists_genre_comparison %>%
  select(Artist, Total.apple) %>%
  top_n(100, wt = Total.apple)

# separate top artists for labeling
top_labels <- top_artists_apple %>%
  top_n(20, wt = Total.apple)

bot_labels <- top_artists_apple %>%
  top_n(-18, wt = Total.apple)

mid_labels <- top_artists_apple %>%
  slice(21:(nrow(top_artists_apple) - 18))
top_artists_apple %>%
  ggplot(
    aes(x = Artist, y = Total.apple, size = log(Total.apple))) +
    geom_point(aes(alpha = Total.apple), color = "#7e7b77", fill = "#e04a5d", shape = 21) +
    labs(
      title = "Najczęściej streamowani artyści na Apple Music",
      subtitle = "Globalnie - sumaryzacja"
    ) +
    xlab("Artysta") +
    ylab("Ilość streamów") +
    scale_size(range = c(1, 20)) +
    scale_alpha_continuous(range = c(0.3, 1)) +
    scale_y_log10() +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + 
    theme(
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      legend.position = "none",
      panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
      panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    ) +
    geom_text(
      data = top_labels,
      aes(label = Artist),
      vjust = 0.5, 
      hjust = 0.5,
      size = 2.5,
      color = "#1b2322"
    )

# TODO zrepelować tylko te labely które trzeba

# TODO zrobić go dla polski

Wersja interactive

ggplotly(top_artists_apple %>%
  ggplot(aes(x = Artist, y = Total.apple, size = log(Total.apple))) +
  geom_point(aes(alpha = Total.apple), color = "#7e7b77", fill = "#e04a5d", shape = 21) +
  xlab("Artysta") +
  ylab("Ilość streamów") +
  labs(
    title = "Najczęściej streamowani artyści na Apple Music",
    subtitle = "Globalnie - wersja interaktywna"
  ) +
  scale_size(range = c(3, 15), name = "Sum of Listeners/Views") +
  scale_alpha_continuous(range = c(0.3, 1)) +
  scale_y_log10() +
  theme_fivethirtyeight() + 
  scale_fill_fivethirtyeight() + 
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    legend.position = "none",
    panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
    panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
    plot.title = element_text(size = 13),
    plot.subtitle = element_text(size = 10)
  ) +
  geom_text(
    data = top_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2.5,
    color = "#1b2322"
  ) +
  geom_text(
    data = mid_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2,
    color = "#1b2322"
  ) +
  geom_text(
    data = bot_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 1,
    color = "#1b2322"
  ) 
)

Najczęściej słuchane gatunki - YouTube - worldwide

preferable_genres_yt <- suppressWarnings({
  youtube_archive %>%
    mutate(across(c('Total', '100M'), ~ str_replace(., ",", ""))) %>%
    mutate(across(c('Total', '100M'), ~ as.numeric(.) * 1000000)) %>%
    left_join(
      OSet_artists_genre,
      by = "Artist",
      suffix = c(".youtube", ".oset")
    ) %>%
    drop_na() %>% # dropping rows that were not matched in left_join
    select(
      Artist, Genre, Total
    ) %>%
    group_by(Genre) %>%
    count() %>%
    arrange(desc(n))
})


head(preferable_genres_yt)
## # A tibble: 6 × 2
## # Groups:   Genre [6]
##   Genre                 n
##   <chr>             <int>
## 1 pop                 280
## 2 rap                 112
## 3 rock                 81
## 4 alternative metal    39
## 5 latin                32
## 6 country              28
ggplot(
  head(preferable_genres_yt),
  aes(
    x = reorder(Genre, -n),
    y = n)
) + geom_bar(
  stat = "identity",
  width = 0.5,
  fill = "#db0000"
  ) + labs(
    title = "Najczęściej słuchane gatunki na YouTube",
    subtitle = "Globalnie"
  ) + xlab(
    ""
    ) + ylab(
      ""
    ) + 
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + theme(
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    )

Wizualizacja - najchętniej słuchani artyści na YouTube - worldwide - summarized

top_artists_yt <- artists_genre_comparison %>%
  select(Artist, Total.youtube) %>%
  top_n(100, wt = Total.youtube)

# separate top artists for labeling
top_labels <- top_artists_yt %>%
  top_n(20, wt = Total.youtube)

bot_labels <- top_artists_yt %>%
  top_n(-18, wt = Total.youtube)

mid_labels <- top_artists_yt %>%
  slice(21:(nrow(top_artists_yt) - 18))
top_artists_yt %>%
  ggplot(
    aes(x = Artist, y = Total.youtube , size = log(Total.youtube))) +
    geom_point(aes(alpha = Total.youtube), color = "#7e7b77", fill = "#deb3b1", shape = 21) +
    labs(
      title = "Najczęściej streamowani artyści na YouTube",
      subtitle = "Globalnie - sumaryzacja"
    ) +
    xlab("Artysta") +
    ylab("Ilość streamów") +
    scale_size(range = c(1, 20)) +
    scale_alpha_continuous(range = c(0.3, 1)) +
    scale_y_log10() +
    theme_fivethirtyeight() + scale_fill_fivethirtyeight() + 
    theme(
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      legend.position = "none",
      panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
      panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
      plot.title = element_text(size = 13),
      plot.subtitle = element_text(size = 10)
    ) +
    geom_text(
      data = top_labels,
      aes(label = Artist),
      vjust = 0.5, 
      hjust = 0.5,
      size = 2.5,
      color = "#1b2322"
    )

# TODO zrepelować tylko te labely które trzeba

# TODO zrobić go dla polski

Wersja interactive

ggplotly(top_artists_yt %>%
  ggplot(aes(x = Artist, y = Total.youtube, size = log(Total.youtube))) +
  geom_point(aes(alpha = Total.youtube), color = "#7e7b77", fill = "#deb3b1", shape = 21) +
  xlab("Artysta") +
  ylab("Ilość streamów") +
  labs(
    title = "Najczęściej streamowani artyści na YouTube",
    subtitle = "Globalnie - wersja interaktywna"
  ) +
  scale_size(range = c(3, 15), name = "Sum of Listeners/Views") +
  scale_alpha_continuous(range = c(0.3, 1)) +
  scale_y_log10() +
  theme_fivethirtyeight() + 
  scale_fill_fivethirtyeight() + 
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    legend.position = "none",
    panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
    panel.grid.minor = element_line(color = "grey95", linewidth = 0.25),
    plot.title = element_text(size = 13),
    plot.subtitle = element_text(size = 10)
  ) +
  geom_text(
    data = top_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2.5,
    color = "#1b2322"
  ) +
  geom_text(
    data = mid_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 2,
    color = "#1b2322"
  ) +
  geom_text(
    data = bot_labels,
    aes(label = Artist),
    vjust = 0.5, 
    hjust = 0.5,
    size = 1,
    color = "#1b2322"
  ) 
)